rm(list=ls())
library(foreign)
library(Zelig) #requires Zelig 3.5.5 for compatibility
                #available at https://cran.r-project.org/src/contrib/Archive/Zelig/

#number of imputed datasets
m <- 10 

#setwd(<Your replication directory>)
setwd("C:/Users/Martin Steinwand/Documents/Project Maritime Boundaries/Analysis/R&RJCR/ReplicationPackage")

#load raw data
alldat <- read.dta("Boundary Data_work.8-22-15.dta")
attach(alldat)
if(id2[2]-id2[1]!=0)cat("Data not sorted \n")
if(year[2]-year[1]!=1)cat("Data not sorted \n")
alldat$difshare <- -(alldat$difshare-100)
workdat <- data.frame(subset(alldat, ongoing2==0))
detach(alldat)

#load imputed data
load(file="BDdat8-2-12y5dum.NoOngoing.RData")

#create new variables
newdat <- transform(newdat, ddema=as.numeric(politya>=7))
newdat <- transform(newdat, ddemb=as.numeric(polityb>=7))
newdat <- transform(newdat, autoca=as.numeric(politya<=-5))
newdat <- transform(newdat, autocb=as.numeric(polityb<=-5 & polityb>=-10))
newdat <- transform(newdat, jointdem=as.numeric(ddema==1 & ddemb==1))
newdat <- transform(newdat, jointautoc=as.numeric(autoca==1 & autocb==1))
newdat <- transform(newdat, demautoc=as.numeric((ddema==1 & autocb==1) | (ddemb==1 & autoca==1)))
newdat <- transform(newdat, gdpdif=abs(gdpa2 - gdpb2)/(gdpa2 + gdpb2))
newdat <- transform(newdat, dependsingle=as.numeric((dependa==1 & dependb==0)|(dependb==1 & dependa==0)))
newdat <- transform(newdat, dependboth=as.numeric(dependa==1 & dependb==1))
newdat <- transform(newdat, coastdif=abs(coastla-coastlb)/(coastla+coastlb))
newdat <- transform(newdat, dcap=abs(capa-capb))
newdat <- transform(newdat, lcoastdif = log(as.numeric(coastdif<=0)*.001+as.numeric(coastdif>0)*coastdif))
newdat <- transform(newdat, vetotot=(vetoa + vetob)/2)
newdat <- transform(newdat, vetodif=abs(vetoa - vetob))
newdat <- transform(newdat, tsp1=workdat$tsp1)
newdat <- transform(newdat, tsp2=workdat$tsp2)
newdat <- transform(newdat, tsp3=workdat$tsp3)
newdat <- transform(newdat, totag=workdat$totag)
newdat <- transform(newdat, depend=as.numeric(dependsingle==1 | dependboth==1))
newdat <- transform(newdat, los=as.numeric(lossingle==1 | losboth ==1))
newdat <- transform(newdat, mid3y=workdat$mid3y)
newdat <- transform(newdat, mid5y=workdat$mid5y)
newdat <- transform(newdat, mid10y=workdat$mid10y)
newdat <- transform(newdat, hmid3y=workdat$hmid3y)
newdat <- transform(newdat, hmid5y=workdat$hmid5y)
newdat <- transform(newdat, hmid10y=workdat$hmid10y)
#additional transformations
newdat <- transform(newdat, difshare=workdat$difshare)
newdat <- transform(newdat, difshare.sc=(workdat$difshare*.01*(189-1)+.5)/189)#rescaled for beta model
            #rescaling according to Smithson and Verkuilen (2006)
newdat <- transform(newdat, diffrac=workdat$difshare/100) #for fractional logit
newdat <- transform(newdat, difdisc=as.numeric(workdat$difshare>=50))
newdat <- transform(newdat, difdisc2=as.numeric(workdat$difshare>0))
#create interactions
newdat <- transform(newdat, JdemOildis=jointdem * workdat$oildisc_f)
newdat <- transform(newdat, JdemOilprod=jointdem * workdat$oilprod_f)
newdat <- transform(newdat, JdemTrade=jointdem * workdat$tradetot1000_f)
newdat <- transform(newdat, JdemFish=jointdem * workdat$fishtot_f)
newdat <- transform(newdat, DcapOildis=dcap * workdat$oildisc_f)
newdat <- transform(newdat, DcapOilprod=dcap * workdat$oilprod_f)
newdat <- transform(newdat, DcapTrade=dcap * workdat$tradetot1000_f)
newdat <- transform(newdat, OilProdDisc=workdat$oildisc_f*workdat$oilprod_f)
newdat <- transform(newdat, oilone=workdat$oilone)
newdat <- transform(newdat, oiltwo=workdat$oiltwo)
newdat <- transform(newdat, oileither=workdat$oileither)#selection equation
newdat <- transform(newdat, oilproddisc=workdat$oileither * workdat$oildisc_f)#selection equation
#now add the dummies for directed oil discovery
newdat <- transform(newdat, oildiscboth = as.numeric(workdat$oildisa==1 & 
                    workdat$oildisb==1))
newdat <- transform(newdat, oildiscone = as.numeric((workdat$oildisa==1 |
                    workdat$oildisb==1) & workdat$oildisa!= workdat$oildisb))
newdat <- transform(newdat, oildiscany= oildiscone+oildiscboth)
newdat <- transform(newdat, totdif=workdat$totdif)
newdat <- transform(newdat, ltotdif=log(workdat$totdif+1))
#routine for printing results
sumres <- function(obj){
    vn <- rownames(obj)
    cat("variable\t", "coef ", "p-value \n", sep="")
    for (i in 1:nrow(obj)){
        cat(vn[i], "\t", obj[i,1], " ", 2*pnorm(-abs(obj[i,1]/obj[i,2])), "\n", sep="")
    }
}

#Obtain first-stage estimates
sform <- formula(agreement ~ jointdem + oildisc_f + oileither+ oilproddisc+
            fishtot_f + tradetot1000_f + los + totag+ dependsingle + dependboth + 
             terrdisp +  voteshare + gdpdif + dcap + tsp1 + tsp2 + tsp3 +
            y5dum1 + y5dum2+ y5dum3 + y5dum4 + y5dum5 + y5dum6 +y5dum7 + y5dum8 +y5dum9 -1)
s.mod <- zelig(sform, 
            data=newdat$imputations, model="probit", cite=FALSE)
#Selection correction
n <- length(s.mod[[1]]$residuals)
imr <-matrix(NA, n,m)
dhat.avg<- rep(NA,m)
for(i in 1:m){
    aux.mod<-glm(sform, 
            data=newdat$imputations[[i]], binomial(link = "probit"), na.action=na.exclude)
    #compute inverse mills ratio 
    imr.mis <- dnorm(aux.mod$linear.predictors)/pnorm(aux.mod$linear.predictors)
    imr[,i] <- naresid(aux.mod$na.action, imr.mis)
    #collect ingredients for estimates of sigma and rho
    dhat.mis <- imr.mis * (imr.mis - aux.mod$linear.predictors)
    #dhat <- naresid(aux.mod$na.action, dhat.mis)
    #calculate average dhat for obs included in 2nd equation only? no...
    dhat.avg[i] <- sum(dhat.mis)/length(dhat.mis)
}

########
#model 1
modform <- formula(difdisc ~ imr[,j] +oildisc_f + oileither + 
            oilproddisc)
j<-1 
if (all.vars(modform[[3]])[2]=="j") k <- length(all.vars(modform[[3]]))  else k <- length(all.vars(modform[[3]])) +1
n.mod.full <- vector("list", m)
rho  <- rep(NA, m)
sterr  <- matrix(NA, k, m)
tstat <- matrix(NA, k, m)
for(j in 1:m){
    n.mod.full[[j]] <- glm(modform, data=newdat$imputations[[j]]
         ,family=binomial(link = "probit"), na.action=na.omit)
    varcov <- sandwich(n.mod.full[[j]])
    sterr[,j] <- sqrt(diag(varcov[1:k,1:k]))
    tstat[,j] <- coefficients(n.mod.full[[j]])[1:k]/sterr[,j]
    
        #obtain estimates of rho and sigma
    varre <- t(as.matrix(n.mod.full[[j]]$residuals))%*%as.matrix(n.mod.full[[j]]$residuals)/length(n.mod.full[[j]]$residuals)
    sigsqhat <- varre+dhat.avg[j]*coefficients(n.mod.full[[j]])[2]^2
    rho[j] <- sqrt(coefficients(n.mod.full[[j]])[2]^2/sigsqhat)
}
getcoef <- function(x){
    x$coefficients
    #x$coefficients$mean #beta regression only 
}
res <- mi.meld(t(sapply(n.mod.full, getcoef)), t(sterr))
out <- t(rbind(res$q.mi, res$se.mi, res$q.mi/res$se.mi))
sumres(out)


########
#model 2
modform <- formula(difdisc ~ imr[,j]
            +jointdem + oildisc_f + oileither + oilproddisc+ fishtot_f+tradetot1000_f + los
            +dependsingle+ dependboth + terrdisp + lcoastdif + gdpdif + dcap)
j<-1 
if (all.vars(modform[[3]])[2]=="j") k <- length(all.vars(modform[[3]]))  else k <- length(all.vars(modform[[3]])) +1
n.mod.full <- vector("list", m)
rho  <- rep(NA, m)
sterr  <- matrix(NA, k, m)
tstat <- matrix(NA, k, m)
for(j in 1:m){
    n.mod.full[[j]] <- glm(modform, data=newdat$imputations[[j]]
         ,family=binomial(link = "probit"), na.action=na.omit)
    varcov <- sandwich(n.mod.full[[j]])
    sterr[,j] <- sqrt(diag(varcov[1:k,1:k]))
    tstat[,j] <- coefficients(n.mod.full[[j]])[1:k]/sterr[,j]
    
        #obtain estimates of rho and sigma
    varre <- t(as.matrix(n.mod.full[[j]]$residuals))%*%as.matrix(n.mod.full[[j]]$residuals)/length(n.mod.full[[j]]$residuals)
    sigsqhat <- varre+dhat.avg[j]*coefficients(n.mod.full[[j]])[2]^2
    rho[j] <- sqrt(coefficients(n.mod.full[[j]])[2]^2/sigsqhat)
}
getcoef <- function(x){
    x$coefficients
}
res <- mi.meld(t(sapply(n.mod.full, getcoef)), t(sterr))
out <- t(rbind(res$q.mi, res$se.mi, res$q.mi/res$se.mi))
sumres(out)


########
#model 3
modform <- formula(difdisc ~ imr[,j]
            +jointdem + oildiscboth + oildiscone + oileither + 
            oilproddisc+ fishtot_f+ tradetot1000_f + los+
            dependsingle+ dependboth + terrdisp + lcoastdif + gdpdif + dcap)
j<-1 
if (all.vars(modform[[3]])[2]=="j") k <- length(all.vars(modform[[3]]))  else k <- length(all.vars(modform[[3]])) +1
n.mod.full <- vector("list", m)
rho  <- rep(NA, m)
sterr  <- matrix(NA, k, m)
tstat <- matrix(NA, k, m)

for(j in 1:m){
    n.mod.full[[j]] <- glm(modform, data=newdat$imputations[[j]]
         ,family=binomial(link = "probit"), na.action=na.omit)
    varcov <- sandwich(n.mod.full[[j]])
    sterr[,j] <- sqrt(diag(varcov[1:k,1:k]))
    tstat[,j] <- coefficients(n.mod.full[[j]])[1:k]/sterr[,j]
    
        #obtain estimates of rho and sigma
    varre <- t(as.matrix(n.mod.full[[j]]$residuals))%*%as.matrix(n.mod.full[[j]]$residuals)/length(n.mod.full[[j]]$residuals)
    sigsqhat <- varre+dhat.avg[j]*coefficients(n.mod.full[[j]])[2]^2
    rho[j] <- sqrt(coefficients(n.mod.full[[j]])[2]^2/sigsqhat)
}
getcoef <- function(x){
    x$coefficients
}
res <- mi.meld(t(sapply(n.mod.full, getcoef)), t(sterr))
out <- t(rbind(res$q.mi, res$se.mi, res$q.mi/res$se.mi))
sumres(out)
